How many observations are mutated/not out of entire mira data?
- 5% of the dataset is composed of epitopes which are mutated in Omicron
VARIANT="BA2"
MIRA_DATASET %>% filter(Variant == VARIANT) %>% select(Omicron_mutated,Variant) %>% table
## Variant
## Omicron_mutated BA2
## OMICRON_MUTATED 4255
## WT 107682
MIRA_DATASET %>% filter(Variant == VARIANT) %>% select(Omicron_mutated,Variant) %>% table %>% prop.table()
## Variant
## Omicron_mutated BA2
## OMICRON_MUTATED 0.03801245
## WT 0.96198755
VARIANT="BA4"
MIRA_DATASET %>% filter(Variant == VARIANT) %>% select(Omicron_mutated,Variant) %>% table
## Variant
## Omicron_mutated BA4
## OMICRON_MUTATED 11548
## WT 100389
MIRA_DATASET %>% filter(Variant == VARIANT) %>% select(Omicron_mutated,Variant) %>% table %>% prop.table()
## Variant
## Omicron_mutated BA4
## OMICRON_MUTATED 0.1031652
## WT 0.8968348
VARIANT="BA5"
MIRA_DATASET %>% filter(Variant == VARIANT) %>% select(Omicron_mutated,Variant) %>% table
## Variant
## Omicron_mutated BA5
## OMICRON_MUTATED 4449
## WT 107488
MIRA_DATASET %>% filter(Variant == VARIANT) %>% select(Omicron_mutated,Variant) %>% table %>% prop.table()
## Variant
## Omicron_mutated BA5
## OMICRON_MUTATED 0.03974557
## WT 0.96025443
MIRA_DATASET %>% group_by(Omicron_mutated,Variant) %>% dplyr::summarise(n=n())%>% group_by(Variant)%>%mutate(Freq=n/sum(n))%>%
ggbarplot(x="Variant",y="Freq",fill="Omicron_mutated",position = position_dodge())
## `summarise()` has grouped output by 'Omicron_mutated'. You can override using the `.groups` argument.

What proportion of epitope repertoire are affected?
VARIANT="BA2"
MIRA_DATASET %>% filter(Variant == VARIANT) %>% select(Experiment, Peptide,Cohort, Omicron_mutated)%>% distinct() %>% group_by(Experiment,Cohort, Omicron_mutated)%>% dplyr::summarise(n=n())%>% mutate(Freq=n/sum(n))%>%ungroup %>%
tidyr::complete_(cols = c("Experiment", "Omicron_mutated"), fill = list(n=0, Freq=0))%>%
filter(Omicron_mutated == "OMICRON_MUTATED")%>% arrange(desc(Freq))%>%
ggbarplot(x="Experiment",y="Freq",fill="Cohort")+rotate_x_text()+ggtitle(VARIANT)
## `summarise()` has grouped output by 'Experiment', 'Cohort'. You can override using the `.groups` argument.

VARIANT="BA4"
MIRA_DATASET %>% filter(Variant == VARIANT) %>% select(Experiment, Peptide,Cohort, Omicron_mutated)%>% distinct() %>% group_by(Experiment,Cohort, Omicron_mutated)%>% dplyr::summarise(n=n())%>% mutate(Freq=n/sum(n))%>%ungroup %>%
tidyr::complete_(cols = c("Experiment", "Omicron_mutated"), fill = list(n=0, Freq=0))%>%
filter(Omicron_mutated == "OMICRON_MUTATED")%>% arrange(desc(Freq))%>%
ggbarplot(x="Experiment",y="Freq",fill="Cohort")+rotate_x_text()+ggtitle(VARIANT)
## `summarise()` has grouped output by 'Experiment', 'Cohort'. You can override using the `.groups` argument.

VARIANT="BA5"
MIRA_DATASET %>% filter(Variant == VARIANT) %>% select(Experiment, Peptide,Cohort, Omicron_mutated)%>% distinct() %>% group_by(Experiment,Cohort, Omicron_mutated)%>% dplyr::summarise(n=n())%>% mutate(Freq=n/sum(n))%>%ungroup %>%
tidyr::complete_(cols = c("Experiment", "Omicron_mutated"), fill = list(n=0, Freq=0))%>%
filter(Omicron_mutated == "OMICRON_MUTATED")%>% arrange(desc(Freq))%>%
ggbarplot(x="Experiment",y="Freq",fill="Cohort")+rotate_x_text()+ggtitle(VARIANT)
## `summarise()` has grouped output by 'Experiment', 'Cohort'. You can override using the `.groups` argument.

FITNESS_DATA_FULL = readRDS("LR_IMMUNO_FITNESS_DATA_TRAPP_SUBVARS.rds")
FITNESS_DATA_FULL=FITNESS_DATA_FULL %>% mutate(MT_ImmunogenicityPrediction = ifelse(OmicronPrediction<0.75, "Negative","Positive")) %>% mutate(WT_ImmunogenicityPrediction = ifelse(WuhanPrediction<0.75, "Negative","Positive"))
LR_IMMUNO=readRDS("LR_IMMUNO_FITNESS_DATA_PAN_PEPTIDE_SUBVAR.rds")%>% select(Peptide,VariantAlignment,LR_Immuno,Variant)
NON_MUTATED_LR_IMMUNO = data.table(Peptide=unique(MIRA_DATASET$Peptide),
VariantAlignment="NA",
LR_Immuno=0,
Variant = c("BA2","BA4","BA5"))
NON_MUTATED_LR_IMMUNO=NON_MUTATED_LR_IMMUNO %>% filter(!Peptide %in% LR_IMMUNO$Peptide)
LR_IMMUNO=LR_IMMUNO %>% rbind(NON_MUTATED_LR_IMMUNO)
#LR_IMMUNO=LR_IMMUNO %>% mutate(LossPMHC = ifelse(Peptide %in% LOSS_PMHC, TRUE, FALSE))
## Gives count, mean, standard deviation, standard error of the mean, and confidence interval (default 95%).
## data: a data frame.
## measurevar: the name of a column that contains the variable to be summariezed
## groupvars: a vector containing names of columns that contain grouping variables
## na.rm: a boolean that indicates whether to ignore NA's
## conf.interval: the percent range of the confidence interval (default is 95%)
summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
conf.interval=.95, .drop=TRUE) {
library(plyr)
# New version of length which can handle NA's: if na.rm==T, don't count them
length2 <- function (x, na.rm=FALSE) {
if (na.rm) sum(!is.na(x))
else length(x)
}
# This does the summary. For each group's data frame, return a vector with
# N, mean, and sd
datac <- ddply(data, groupvars, .drop=.drop,
.fun = function(xx, col) {
c(N = length2(xx[[col]], na.rm=na.rm),
mean = mean (xx[[col]], na.rm=na.rm),
sd = sd (xx[[col]], na.rm=na.rm)
)
},
measurevar
)
# Rename the "mean" column
datac <- rename(datac, c("mean" = measurevar))
datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean
# Confidence interval multiplier for standard error
# Calculate t-statistic for confidence interval:
# e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
ciMult <- qt(conf.interval/2 + .5, datac$N-1)
datac$ci <- datac$se * ciMult
return(datac)
}
COHORT_LIST = MIRA_DATASET %>% select(Experiment,Cohort) %>% distinct()
VARIANT="BA2"
ERROR_DATASET <- summarySE(MIRA_DATASET %>% filter(Variant==VARIANT) %>% inner_join(LR_IMMUNO), measurevar="LR_Immuno", groupvars=c("Experiment"))
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following object is masked from 'package:purrr':
##
## compact
## The following object is masked from 'package:XVector':
##
## compact
## The following object is masked from 'package:IRanges':
##
## desc
## The following object is masked from 'package:S4Vectors':
##
## rename
## The following object is masked from 'package:ggpubr':
##
## mutate
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## Joining, by = c("Peptide", "Variant")
TCR_DT=ERROR_DATASET
LR_IMMUNO_PLT_MIRA_BA2=TCR_DT %>% inner_join(COHORT_LIST)%>%
ggplot(aes(x=reorder(Experiment, -LR_Immuno), y=LR_Immuno,fill=Cohort))+geom_bar(stat = "identity")+
geom_errorbar(aes(ymin=LR_Immuno-se, ymax=LR_Immuno+se), colour="black", width=.5, # Width of the error bars
position=position_dodge(.9))+theme_pubr(base_size = 16)+rotate_x_text()+xlab("Subject ID")+ggtitle(VARIANT)
## Joining, by = "Experiment"
LR_IMMUNO_PLT_MIRA_BA2

saveRDS(LR_IMMUNO_PLT_MIRA_BA2,file="LR_IMMUNO_PLT_MIRA_BA2.rds")
VARIANT="BA4"
ERROR_DATASET <- summarySE(MIRA_DATASET %>% filter(Variant==VARIANT) %>% inner_join(LR_IMMUNO), measurevar="LR_Immuno", groupvars=c("Experiment"))
## Joining, by = c("Peptide", "Variant")
TCR_DT=ERROR_DATASET
LR_IMMUNO_PLT_MIRA_BA4=TCR_DT %>% inner_join(COHORT_LIST)%>%
ggplot(aes(x=reorder(Experiment, -LR_Immuno), y=LR_Immuno,fill=Cohort))+geom_bar(stat = "identity")+
geom_errorbar(aes(ymin=LR_Immuno-se, ymax=LR_Immuno+se), colour="black", width=.5, # Width of the error bars
position=position_dodge(.9))+theme_pubr(base_size = 16)+rotate_x_text()+xlab("Subject ID")+ggtitle(VARIANT)
## Joining, by = "Experiment"
LR_IMMUNO_PLT_MIRA_BA4

saveRDS(LR_IMMUNO_PLT_MIRA_BA4,file="LR_IMMUNO_PLT_MIRA_BA4.rds")
VARIANT="BA5"
ERROR_DATASET <- summarySE(MIRA_DATASET %>% filter(Variant==VARIANT) %>% inner_join(LR_IMMUNO), measurevar="LR_Immuno", groupvars=c("Experiment"))
## Joining, by = c("Peptide", "Variant")
TCR_DT=ERROR_DATASET
LR_IMMUNO_PLT_MIRA_BA5=TCR_DT %>% inner_join(COHORT_LIST)%>%
ggplot(aes(x=reorder(Experiment, -LR_Immuno), y=LR_Immuno,fill=Cohort))+geom_bar(stat = "identity")+
geom_errorbar(aes(ymin=LR_Immuno-se, ymax=LR_Immuno+se), colour="black", width=.5, # Width of the error bars
position=position_dodge(.9))+theme_pubr(base_size = 16)+rotate_x_text()+xlab("Subject ID")+ggtitle(VARIANT)
## Joining, by = "Experiment"
saveRDS(LR_IMMUNO_PLT_MIRA_BA5,file="LR_IMMUNO_PLT_MIRA_BA5.rds")